import pandas as pd
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=True)
import matplotlib.pyplot as plt
def prepare_df(df_path, df_name):
"""Read in information from dataframe"""
# read in df1
df = pd.read_csv(df_path, header=0, sep="\t")
# Mark significants
df["Significant"] = "No"
df.loc[(df["FDR"] < 0.05) & (df["logCPM"]>2), "Significant"] = "FDR<0.05\nlogCPM>2"
# set id as index
df.set_index("id", inplace=True)
# rename columns
df.columns = df.columns + "_" + df_name
# reset index
df.reset_index(inplace=True)
return df
def plot_MA(df, df_name):
"""Plot MA"""
sns.set(font_scale=2)
sns_plot = sns.lmplot(x=f"logCPM_{df_name}",
y=f"logFC_{df_name}",
data=df,
fit_reg=False,
hue=f"Significant_{df_name}",
palette=["black", "red"],
height=15,
aspect=1)
ax = plt.gca()
ax.set_title(f"MA plot for {df_name}")
def merge_df(df_1, df_2):
"""Merge dataframes"""
df = pd.merge(df_1, df_2, on="id")
df["Significant"] = "No"
return df
def plot_FC_correlation(df, FC_1_name, FC_2_name, hue):
"""Plot correlation between FCs"""
sns.set(font_scale=2)
sns_plot = sns.lmplot(x=f"logFC_{FC_1_name}",
y=f"logFC_{FC_2_name}",
data=df,
fit_reg=False,
hue=hue,
palette=["black", "red"],
height=15,
aspect=1)
ax = plt.gca()
ax.set_title(f"FCs between {FC_1_name} and {FC_2_name}")
def compare(df_1_path, df_2_path, df_1_name, df_2_name):
"Combine multiple functions"
df_1 = prepare_df(df_1_path, df_1_name)
plot_MA(df_1, df_1_name)
df_2 = prepare_df(df_2_path, df_2_name)
plot_MA(df_2, df_2_name)
df_merged = merge_df(df_1, df_2)
plot_FC_correlation(df_merged, df_1_name, df_2_name, hue="Significant")
plot_FC_correlation(df_merged, df_1_name, df_2_name, hue="Significant_" + df_1_name)
plot_FC_correlation(df_merged, df_1_name, df_2_name, hue="Significant_" + df_2_name)
return df_merged
ip_path = "../WAGO_3_IP/results/DE__N2_Input_Rpph__N2_xf119_Rpph/plot_small_RNAs/22G.tsv"
ip_name = "IP"
Dpf_3_null_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3__Dpf_3_null/plot_small_RNAs/22G.tsv"
Dpf_3_null_name = "Dpf_3_null"
Dpf_3_S784A_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3__Dpf_3_S784A//plot_small_RNAs/22G.tsv"
Dpf_3_S784A_name = "Dpf_3_S784A"
mut_2_path = "../small_RNA_seq_15_C/results/DE__WT_other__mut_2/plot_small_RNAs/22G.tsv"
mut_2_name = "mut_2"
mut_7_path = "../small_RNA_seq_15_C/results/DE__WT_other__mut_7/plot_small_RNAs/22G.tsv"
mut_7_name = "mut_7"
Dpf_3_null_first_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3_first__Dpf_3_null/plot_small_RNAs/22G.tsv"
Dpf_3_null_first_name = "Dpf_3_null_first"
Dpf_3_S784A_second_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3_second__Dpf_3_S784A/plot_small_RNAs/22G.tsv"
Dpf_3_S784A_second_name = "Dpf_3_S784A_second"
Dpf_3_null_old_path = "../small_RNA_seq_25_C/results/DE__WT__dpf_3_delta/plot_small_RNAs/22G.tsv"
Dpf_3_null_old_name = "Dpf_3_null_old"
dpf_3S784A_old_path = "../small_RNA_seq_25_C/results/DE__WT__dpf_3S784A/plot_small_RNAs/22G.tsv"
dpf_3S784A_old_name = "Dpf_3_S784A_old"
df_1_path = ip_path
df_2_path = Dpf_3_null_path
df_1_name = ip_name
df_2_name = Dpf_3_null_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]>0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}>0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = ip_path
df_2_path = Dpf_3_S784A_path
df_1_name = ip_name
df_2_name = Dpf_3_S784A_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]>0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}>0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = ip_path
df_2_path = mut_2_path
df_1_name = ip_name
df_2_name = mut_2_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]>0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}>0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = ip_path
df_2_path = mut_7_path
df_1_name = ip_name
df_2_name = mut_7_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]>0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}>0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_null_path
df_2_path = Dpf_3_S784A_path
df_1_name = Dpf_3_null_name
df_2_name = Dpf_3_S784A_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_null_first_path
df_2_path = Dpf_3_S784A_second_path
df_1_name = Dpf_3_null_first_name
df_2_name = Dpf_3_S784A_second_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_null_path
df_2_path = mut_2_path
df_1_name = Dpf_3_null_name
df_2_name = mut_2_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_null_path
df_2_path = mut_7_path
df_1_name = Dpf_3_null_name
df_2_name = mut_7_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_S784A_path
df_2_path = mut_2_path
df_1_name = Dpf_3_S784A_name
df_2_name = mut_2_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_S784A_path
df_2_path = mut_7_path
df_1_name = Dpf_3_S784A_name
df_2_name = mut_7_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_null_path
df_2_path = Dpf_3_null_old_path
df_1_name = Dpf_3_null_name
df_2_name = Dpf_3_null_old_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
df_1_path = Dpf_3_S784A_path
df_2_path = dpf_3S784A_old_path
df_1_name = Dpf_3_S784A_name
df_2_name = dpf_3S784A_old_name
df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
df_all["Significant"] = "No"
df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
(df_all[f"FDR_{df_2_name}"]<0.05) &
(df_all[f"logFC_{df_1_name}"]<0) &
(df_all[f"logFC_{df_2_name}"]<0) &
(df_all[f"logCPM_{df_1_name}"]>2) &
(df_all[f"logCPM_{df_2_name}"]>2)
, "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogFC_{df_1_name}<0\nlogFC_{df_2_name}<0\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"
print(len(df_all[df_all["Significant"] != "No"]))
plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant")
# Dpf_3_S784A_selected_22G_RNAs = Dpf_3_S784A[(Dpf_3_S784A["Significant_Dpf_3_S784A"] == "FDR<0.05\nlogCPM>2") & (Dpf_3_S784A["logFC_Dpf_3_S784A"] < 0)]["id"].tolist()
# IP_selected_22G_RNAs = ip[(ip["Significant_IP"] == "FDR<0.05\nlogCPM>2") & (ip["logFC_IP"] > 0)]["id"].tolist()
# len(IP_selected_22G_RNAs), len(Dpf_3_S784A_selected_22G_RNAs)
# overlap = [value for value in Dpf_3_S784A_selected_22G_RNAs if value in IP_selected_22G_RNAs]
# len(overlap)